#!/usr/bin/python3
# -*- coding: utf-8 -*-

import requests
import json
import os
from multiprocessing import Pool, freeze_support
import sys
import codecs
import logging


def get_authors_from_conference(abbr, year):
    """ Return authors from a conference. """

    count = 1000
    key = '632667c894cd4187978598134d705272'

    url = 'https://api.labs.cognitive.microsoft.com/academic/v1.0/evaluate'
    headers = {
        'Content-Type': 'application/json',
        'Ocp-Apim-Subscription-Key': key
    }
    payload = {
        'expr': "AND(Composite(C.CN='%s'),Y=%s)" % (abbr, year),
        'model': 'latest',
        'count': count,
        'offset': 0,
        'attributes': 'AA.S,AA.AuN,AA.AfN,C.CN,Ti,Y'
    }
    s=requests.Session()
    r = requests.get(url, headers=headers, params=payload)
    j = r.json()

    author_list = []
    error_list=[]
    
    if 'entities' in j:
        entities = j['entities']
    else:
        print('%s %s search error!'%(abbr,year))
        error_item={'abbr':abbr,'year':year}
        error_list.append(error_item)
        return author_list
    #skip the error search item
        
    assert(len(entities) < count)
    for paper in entities:
        title = paper['Ti']
        pages = ''
        for author in paper['AA']:
            order = str(author['S'])
            email = ''
            name = author['AuN'].title()
            if ',' not in name:
                last_name = name.split()[-1]
                first_name = name.split()
                first_name.pop()
                first_name = ' '.join(first_name)
                name = '%s, %s' % (last_name, first_name)
            affiliation = author['AfN'] if 'AfN' in author else ''
            author_item = [abbr, year, title, pages,
                    order, name, email, affiliation]
            author_list.append(author_item)

    return author_list


def main():
    """ Craw author lists from Microsoft Academic Graph """
    if len(sys.argv) <= 1:
        print('Usage: MS.py <conference-list>', file=sys.stderr)
        return

    filename = sys.argv[1]
    save_path='results-cc/'
    if os.path.exists(save_path):
        pass
    else:
        os.mkdir(save_path)

    logging.basicConfig(level=logging.INFO)

    uopen = lambda path, mode: open(path, mode, encoding='utf-8')
    requests.session().cookies.clear()
    for line in uopen(filename, 'r'):
        abbr, year = line.rstrip().split('\t')
        print(abbr,year)
        print('[Working] extract %s%s papers' % (abbr, year), file=sys.stderr)
        author_list = get_authors_from_conference(abbr, year)
        if len(author_list) == 0:
            print('[Warning] %s%s papers not found' % (abbr, year), file=sys.stderr)
        # Output csv format: abbr, year, title, pages, order, author, email, affiliation
        csv_filename = '%s%s.csv' % (abbr, year)
        with open(save_path+csv_filename, 'wb') as csv:  # for excel (byte mode + BOM)
            csv.write(codecs.BOM_UTF8)
            add_quote = lambda x: '"%s"' % x if ',' in x else x
            csv.write('\n'.join(
                ','.join(add_quote(field) for field in author_info)
                for author_info in author_list).encode('utf-8'))
        print('[Done] %s written' % csv_filename, file=sys.stderr)
        print('', file=sys.stderr)


if __name__ == '__main__':
    freeze_support()
    main()
